@@ -1,5 +1,6 @@ |
||
1 | 1 |
# Changes |
2 | 2 |
|
3 |
+* Jul 30, 2015 - RssAgent can configure the order of events created via `events_order`. |
|
3 | 4 |
* Jul 29, 2015 - WebsiteAgent can configure the order of events created via `events_order`. |
4 | 5 |
* Jul 29, 2015 - DataOutputAgent can configure the order of events in the output via `events_order`. |
5 | 6 |
* Jul 20, 2015 - Control Links (used by the SchedularAgent) are correctly exported in Scenarios. |
@@ -9,6 +9,8 @@ module Agents |
||
9 | 9 |
can_dry_run! |
10 | 10 |
default_schedule "every_1d" |
11 | 11 |
|
12 |
+ DEFAULT_EVENTS_ORDER = [['{{date_published}}', 'time'], ['{{last_updated}}', 'time']] |
|
13 |
+ |
|
12 | 14 |
description do |
13 | 15 |
<<-MD |
14 | 16 |
This Agent consumes RSS feeds and emits events when they change. |
@@ -29,6 +31,12 @@ module Agents |
||
29 | 31 |
* `disable_url_encoding` - Set to `true` to disable url encoding. |
30 | 32 |
* `user_agent` - A custom User-Agent name (default: "Faraday v#{Faraday::VERSION}"). |
31 | 33 |
* `max_events_per_run` - Limit number of events created (items parsed) per run for feed. |
34 |
+ |
|
35 |
+ # Ordering Events |
|
36 |
+ |
|
37 |
+ #{description_events_order} |
|
38 |
+ |
|
39 |
+ In this Agent, the default value for `events_order` is `#{DEFAULT_EVENTS_ORDER.to_json}`. |
|
32 | 40 |
MD |
33 | 41 |
end |
34 | 42 |
|
@@ -70,6 +78,11 @@ module Agents |
||
70 | 78 |
end |
71 | 79 |
|
72 | 80 |
validate_web_request_options! |
81 |
+ validate_events_order |
|
82 |
+ end |
|
83 |
+ |
|
84 |
+ def events_order |
|
85 |
+ super.presence || DEFAULT_EVENTS_ORDER |
|
73 | 86 |
end |
74 | 87 |
|
75 | 88 |
def check |
@@ -77,26 +90,15 @@ module Agents |
||
77 | 90 |
response = faraday.get(url) |
78 | 91 |
if response.success? |
79 | 92 |
feed = FeedNormalizer::FeedNormalizer.parse(response.body) |
80 |
- feed.clean! if interpolated['clean'] == 'true' |
|
93 |
+ feed.clean! if boolify(interpolated['clean']) |
|
81 | 94 |
max_events = (interpolated['max_events_per_run'].presence || 0).to_i |
82 | 95 |
created_event_count = 0 |
83 |
- feed.entries.sort_by { |entry| [entry.date_published, entry.last_updated] }.each.with_index do |entry, index| |
|
96 |
+ sort_events(feed_to_events(feed)).each.with_index do |event, index| |
|
84 | 97 |
break if max_events && max_events > 0 && index >= max_events |
85 |
- entry_id = get_entry_id(entry) |
|
98 |
+ entry_id = event.payload[:id] |
|
86 | 99 |
if check_and_track(entry_id) |
87 | 100 |
created_event_count += 1 |
88 |
- create_event(payload: { |
|
89 |
- id: entry_id, |
|
90 |
- date_published: entry.date_published, |
|
91 |
- last_updated: entry.last_updated, |
|
92 |
- url: entry.url, |
|
93 |
- urls: entry.urls, |
|
94 |
- description: entry.description, |
|
95 |
- content: entry.content, |
|
96 |
- title: entry.title, |
|
97 |
- authors: entry.authors, |
|
98 |
- categories: entry.categories |
|
99 |
- }) |
|
101 |
+ create_event(event) |
|
100 | 102 |
end |
101 | 103 |
end |
102 | 104 |
log "Fetched #{url} and created #{created_event_count} event(s)." |
@@ -122,5 +124,22 @@ module Agents |
||
122 | 124 |
true |
123 | 125 |
end |
124 | 126 |
end |
127 |
+ |
|
128 |
+ def feed_to_events(feed) |
|
129 |
+ feed.entries.map { |entry| |
|
130 |
+ Event.new(payload: { |
|
131 |
+ id: get_entry_id(entry), |
|
132 |
+ date_published: entry.date_published, |
|
133 |
+ last_updated: entry.last_updated, |
|
134 |
+ url: entry.url, |
|
135 |
+ urls: entry.urls, |
|
136 |
+ description: entry.description, |
|
137 |
+ content: entry.content, |
|
138 |
+ title: entry.title, |
|
139 |
+ authors: entry.authors, |
|
140 |
+ categories: entry.categories |
|
141 |
+ }) |
|
142 |
+ } |
|
143 |
+ end |
|
125 | 144 |
end |
126 | 145 |
end |
@@ -66,6 +66,21 @@ describe Agents::RssAgent do |
||
66 | 66 |
expect(last.payload['urls']).to eq(["https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af"]) |
67 | 67 |
end |
68 | 68 |
|
69 |
+ it "should emit items as events in the order specified in the events_order option" do |
|
70 |
+ expect { |
|
71 |
+ agent.options['events_order'] = ['{{title | replace_regex: "^[[:space:]]+", "" }}'] |
|
72 |
+ agent.check |
|
73 |
+ }.to change { agent.events.count }.by(20) |
|
74 |
+ |
|
75 |
+ first, *, last = agent.events.last(20) |
|
76 |
+ expect(first.payload['title'].strip).to eq('upgrade rails and gems') |
|
77 |
+ expect(first.payload['url']).to eq("https://github.com/cantino/huginn/commit/87a7abda23a82305d7050ac0bb400ce36c863d01") |
|
78 |
+ expect(first.payload['urls']).to eq(["https://github.com/cantino/huginn/commit/87a7abda23a82305d7050ac0bb400ce36c863d01"]) |
|
79 |
+ expect(last.payload['title'].strip).to eq('Dashed line in a diagram indicates propagate_immediately being false.') |
|
80 |
+ expect(last.payload['url']).to eq("https://github.com/cantino/huginn/commit/0e80f5341587aace2c023b06eb9265b776ac4535") |
|
81 |
+ expect(last.payload['urls']).to eq(["https://github.com/cantino/huginn/commit/0e80f5341587aace2c023b06eb9265b776ac4535"]) |
|
82 |
+ end |
|
83 |
+ |
|
69 | 84 |
it "should track ids and not re-emit the same item when seen again" do |
70 | 85 |
agent.check |
71 | 86 |
expect(agent.memory['seen_ids']).to eq(agent.events.map {|e| e.payload['id'] }) |